#' Question tag root
#'
#' Given a vector of question names, \code{get_root} returns the
#' original, non-transformed corresponding tags. Note, ordinalized
#' versions of the HLIs are considered separate question tag, since
#' they have alternative entries in the codebook.
#'
#' @param x CharacterVector of question names. For example, the column
#'     names from the final V-Dem DS.
#'
#' @section Warning: Here be dragons. We will only transform V-Dem
#'     variable tags, meaning that we'll attempt to match \code{v\\d}
#'     or \code{e_v\\d}, while always excluding the direct democracy
#'     variables (v2dd* or v2xdd*) since we never create separate
#'     versions of those.
#'
#' @examples
#' get_root(c("v2clacfree", "v2clacfree_osp", "v2x_freexp_codehigh"))
#'
#' @export
get_root <- function(x) {
    # Please Lord, forgive me.
    ifelse(!grepl("^(e_)?v\\d(?!x?dd)", x, perl = T),
           x,
           sub("(?<!^e|v\\dx|v\\dx\\S{2}|v\\d\\S{2})_(?!\\dC|thick|altinf).*$", "", x, perl = T))
}

#' Historical/Contemporary questions
#'
#' Check whether a given question tag is historical (v3) or
#' contemporary (v2).
#'
#' @param x CharacterVector of question tags
#'
#' @examples
#' is.contemp(c("v2clacfree", "v3clacfree"))
#' is.hist(c("v2csreprss", "v3csreprss"))
#'
#' @export
is.contemp <- function(x) {
    substring(x, 1, 2) == "v2"
}

#' @rdname is.contemp
#' @export
is.hist <- function(x) {
    substring(x, 1, 2) == "v3"
}

#' Check if a variable exists for both contemporary and historical
#'
#' \code{is.shared_tag} takes a vector of variable tag names and returns
#' a logical vector indicating which variables exist in both
#' historical and contemporary.
#'
#' @param tags CharacterVector of v2/v3 tag names.
#' @param ttable Translation table listing all contemporary (v2) and
#'     historical (v3) variable tag names (typically our
#'     question_table).
#'
#' @section Warning: \code{is.shared_tag} works only with v2/v3 tag
#'     names. So older variable names lacking the necessary prefix
#'     will always return FALSE, which is actually what we want
#'     because they only exist in the deprecated contemporary surveys.
#'
#' @examples
#' vars <- c("v2clacfree", "v3clacfree", "v3elage", "v2elpdcamp")
#' ttable <- data.frame(name = vars, stringsAsFactors = FALSE)
#'
#' is.shared_tag(vars, ttable)
#'
#' @return LogicalVector
#'
#'@export
is.shared_tag <- function(tags, ttable) {
    if (!"name" %in% colnames(ttable))
        stop("Missing name column in ttable")

    if (any(!tags %in% ttable$name))
        stop("Missing values in ttable from " %^% deparse(substitute(v)))

    roots <- substring(tags, 3)
    ifelse(("v3" %^% roots) %in% ttable$name & ("v2" %^% roots) %in% ttable$name, T, F)
}

#' Normalize historical \code{question_id}s
#'
#' Given a vector of \code{question_id}s, \code{normalize_qids}
#' replaces the historical \code{question_id}s with the analogous
#' contemporary \code{question_id}s where there's a match based on
#' \code{ttable}.
#'
#' @param ids NumericVector of \code{question_id}s.
#' @param ttable \code{question_id} translation table (\emph{e.g.},
#'     the question table).
#'
#' @details \code{normalize_qids} is fairly inflexible and is meant to
#'     work directly with the question table. Thus, \code{ttable}
#'     requires two columns: \code{name} and \code{question_id}. We
#'     determine whether an historical \code{question_id} has a
#'     matching contemporary \code{question_id} by checking the root
#'     tag --- the portion of the tag after removing the \code{v\\d}
#'     suffix.
#'
#' @section Warning: We are currently not checking whether matching
#'     historical and contemporary \code{question_id}s have the same
#'     \code{K} (\emph{i.e.}, number of answer categories). This is
#'     mostly because our information on \code{K} is fairly
#'     incomplete; plus, there are a number of variables such as
#'     \code{v3lgbicam} which diverge from contemporary and yet we
#'     still want to merge them together.
#'
#'     This is something that can be improved in the future.
#'
#' @examples
#' ttable <- data.frame(question_id = 1:3,
#'                      name = c("v2clacfree", "v3clacfree", "v3strenadm"),
#'                      stringsAsFactors = FALSE)
#'
#' normalize_qids(1:3, ttable)
#'
#' @export
normalize_qids <- function(ids, ttable) {
    if (is.null(ttable))
        stop("Missing translation table")

    tags <- trans(ids, "name", ttable, "question_id")
    roots <- sub("v3", "v2", tags)

    ids[roots %in% ttable$name & roots %in% tags] <-
        trans(roots[roots %in% ttable$name & roots %in% tags], "question_id", ttable, "name")

    # assertion exception for v2lgbicam / v3lgbicam
    #ids_test <- ids
    #ids_test<- ifelse(ids_test == 1379,
    #                      595,
    #                      ids_test)
    #stopifnot(identical(trans(ids_test, "k", ttable, "question_id"),
    #                    trans(new_ids, "k", ttable, "question_id")))

    ids
}

#' Get survey from question tag
#'
#' Given a vector of question tags, \code{get_survey} returns the
#' two letter survey abbreviation for A*, A, B, and C-variables
#'
#' @param x CharacterVector of question tags.
#'
#' @examples
#' get_survey(c("v2clacfree", "v2dlcountr"))
#'
#' @export
get_survey <- function(x) {

    if (any(!grepl("^v\\d[a-z]+$", x)))
        stop("Invalid input, expected an A*, A, B, C tag name.", call. = F)

    substr(x, 3, 4)
}
